library(here)
## here() starts at /Users/adelheid/Documents/MEDS/EDS_222/eds222_final_project
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tmap)
library(lfe)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

Background

Data Wrangling: All Species

Reading In Data

I used data from two sources: California Department of fish and wildlife and California Protected areas database.

salmon_populations <- read_csv(here("data/Salmonid_Population_Monitoring_Data_CMPv2021.csv"))
## New names:
## Rows: 3918 Columns: 26
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (18): Watershed, Population, Species, Life Stage, Origin, Run designatio... dbl
## (4): ID, CDFW region, GEO_ID_POLY, GEO_ID_PT num (3): Value, X95 lower CI, X95
## upper CI lgl (1): ...26
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...26`
watershed <- st_read(here("data/ds3001/ds3001.gdb"))
## Multiple layers are present in data source /Users/adelheid/Documents/MEDS/EDS_222/eds222_final_project/data/ds3001/ds3001.gdb, reading layer `ds3001'.
## Use `st_layers' to list all layer names and their type in a data source.
## Set the `layer' argument in `st_read' to read a particular layer.
## Warning in evalq((function (..., call. = TRUE, immediate. = FALSE, noBreaks. =
## FALSE, : automatically selected the first layer in a data source containing more
## than one.
## Reading layer `ds3001' from data source 
##   `/Users/adelheid/Documents/MEDS/EDS_222/eds222_final_project/data/ds3001/ds3001.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 157 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -370211.5 ymin: -445468.7 xmax: 179047.7 ymax: 465217.3
## Projected CRS: NAD83 / California Albers
protected_areas <- st_read(here("data/CPAD_2022a/CPAD_2022a_Holdings.dbf"))
## Reading layer `CPAD_2022a_Holdings' from data source 
##   `/Users/adelheid/Documents/MEDS/EDS_222/eds222_final_project/data/CPAD_2022a/CPAD_2022a_Holdings.dbf' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 94534 features and 35 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -374984.2 ymin: -604454.8 xmax: 540016.3 ymax: 449743.2
## Projected CRS: NAD83 / California Albers

Filtering to Adult Salmon Data

In this section I refine the data, for this study I am interested in looking at adult population counts, so I select data on Adults.

Not every watershed polygon provided by the CMP has count data for it. I am taking out polygons with no adult data associated. There are 157 polygons of watersheds, I have adult counts for 110 of them.

spawning_data <- salmon_populations |> filter(`Life Stage` %in% "Adult") |> #all adult salmon
  select("Population", "Watershed", "Species", Brood_year = "Brood Year", "GEO_ID_POLY", "Value", "Metric", "Estimation method") |> #selecting relevant columns
  filter(!is.na(GEO_ID_POLY)) #taking out data with no matching spatial id

watershed_id <- unique(spawning_data$GEO_ID_POLY) #making a list of all the watersheds that have adult population data 

watershed_new <- watershed |> filter(GEO_ID_POL %in% watershed_id) |> st_make_valid() #filter to watersheds that have spawning data available


protected <- protected_areas |> select(UNIT_NAME, YR_EST) |> st_make_valid()
#selecting relevant columns

Filtering Protected Areas

protected2 <- protected |> filter(YR_EST < 1981| YR_EST %in% c(NA)) #remove after 1981

Calculating Percent Protected: Geo spatial Wrangling

#Filter to protected areas within watersheds of interest and find the area of intersection for each 
intersect_polygons <- st_intersection(protected2, watershed_new) |> 
   dplyr::select(Name, GEO_ID_POL) #select relevant columns
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
#find total area protected for each watershed 
total_overlap <- intersect_polygons |> group_by(Name) |> #group by watershed
  summarize(geometry = st_union(geometry))|> #combine geometries within watershed
  mutate(total_protected = st_area(geometry)) #find total protected 

# dropping geometry 
total_overlap_geomless <- total_overlap |> st_drop_geometry()

watershed_area <- watershed_new |> mutate(total_area = st_area(Shape)) #find the total area of each watershed 

watershed_protected <- left_join(watershed_area, total_overlap_geomless, by = "Name") #add area protected column by joining

#calculate percent protected 
watershed_final <- watershed_protected |> 
mutate(percent_protected = 
         as.numeric((total_protected)/ as.numeric(total_area)) *100) |> 
  mutate(percent_protected = round(percent_protected, digits = 0)) |> #round 
  mutate(percent_protected  = replace_na(percent_protected, 0)) #change NA to 0

#drop geometry and make it a data frame
watershed_geomless <- st_drop_geometry(watershed_final) |> as.data.frame() |> 
  select(- Method_Typ)

#combine spawning observations with percent protected
all_data <- left_join(spawning_data, watershed_geomless, by = c("GEO_ID_POLY" = "GEO_ID_POL")) |> select("Population", "Species", "Value", "percent_protected", "Name", "Brood_year", "Metric", "Estimation method")
tmap_mode("view")
## tmap mode set to interactive viewing
tmap_options(check.and.fix = TRUE)

tm_shape(total_overlap) + tm_fill(col = "#004600") + #map protected portions
  tm_shape(watershed_new) + tm_borders(col = "blue") + tm_add_legend(labels = c( "Watershed Boundary", "Protected area"), col = c("blue", "#004600")) #map watershed boundaries
## Warning: The shape total_overlap is invalid (after reprojection). See
## sf::st_is_valid

Steelhead

Data Wrangling

steelhead <- all_data |> filter(Species == "Steelhead") #filter for steelhead


steelhead_summary <- steelhead |>  group_by(Population, Brood_year) |> summarize(Value = max(Value), percent_protected = mean(percent_protected)) #taking the mean estimation for years where estimate done multiple ways 
## `summarise()` has grouped output by 'Population'. You can override using the
## `.groups` argument.
data_years <- steelhead_summary |> group_by(Population) |> summarize(min_year = min(Brood_year), max_year = max(Brood_year), total_year = length(Brood_year)) #find what years I have data over

Regression

\(populationcount = B_0 + B_1year_t + B_2Year_t * percentProtected +percentprotected +E_i\)

#running a regression with all the steelhead data
steelhead_all <- steelhead_summary |> mutate(year = as.numeric(Brood_year))

summary(lm(Value~year+ year:percent_protected + percent_protected, data = steelhead_all))
## 
## Call:
## lm(formula = Value ~ year + year:percent_protected + percent_protected, 
##     data = steelhead_all)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##  -809.5  -427.1  -323.9   -95.1 15775.6 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)            -9.276e+04  2.827e+04  -3.281  0.00108 **
## year                    4.631e+01  1.404e+01   3.297  0.00102 **
## percent_protected       1.371e+03  4.731e+02   2.899  0.00384 **
## year:percent_protected -6.816e-01  2.351e-01  -2.899  0.00384 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1297 on 845 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01301,    Adjusted R-squared:  0.009509 
## F-statistic: 3.714 on 3 and 845 DF,  p-value: 0.01133
model <- felm(Value~year+ year:percent_protected + percent_protected | Population, data = steelhead_all)

getfe(model)
##                                                       effect obs comp
## Population.Albion River                            -66376.87  12    1
## Population.Alder Creek                             -66424.97   1    1
## Population.Aptos Creek                             -66610.31   2    1
## Population.Arroyo Hondo Creek                      -66768.27   1    1
## Population.Arroyo Sequit Creek                     -66791.74  15    1
## Population.Big Basin sub-region                    -66503.35   2    1
## Population.Big Basin/San Mateo region              -65609.83   6    1
## Population.Big River                               -65884.12  12    1
## Population.Big Salmon Creek                        -66313.73   4    1
## Population.Big Sur River                           -66818.57   6    1
## Population.Big Sycamore Creek                      -66846.03   5    1
## Population.Bogus Creek                             -66426.64  12    1
## Population.Brush Creek                             -66379.65   9    1
## Population.Carmel River                            -66560.28  34    1
## Population.Carpinteria Creek                       -66756.72   5    1
## Population.Caspar Creek                            -66911.91  18    1
## Population.Central Coastal Diversity Stratum       -65095.82  11    1
## Population.Cheda Creek                             -66980.61  15    1
## Population.Cottaneva Creek                         -66298.18   5    1
## Population.DeHaven Creek                           -66442.97   1    1
## Population.Dry Creek                               -64040.73   6    1
## Population.Eel River                               -66436.18  39    1
## Population.Elk Creek                               -66329.54   2    1
## Population.Freshwater Creek                        -66141.46  21    1
## Population.Garcia River                            -65899.95  12    1
## Population.Gazos Creek                             -66559.34   7    1
## Population.Greenwood Creek                         -66384.01   4    1
## Population.Gualala River                           -65739.01   1    1
## Population.Hare Creek                              -66763.70   4    1
## Population.Hilton Creek                            -66618.12  19    1
## Population.Humboldt Bay                            -66386.24  11    1
## Population.Juan Creek                              -66414.31   3    1
## Population.Lagunitas Creek                         -66620.60  23    1
## Population.Las Flores Creek                        -66663.16   5    1
## Population.Little River                            -66728.94  20    1
## Population.Lower Eel River and Van Duzen River     -66568.34   1    1
## Population.Lower Mainstem Eel River                -63965.79   2    1
## Population.Malibu Creek                            -66716.74  15    1
## Population.Mattole River                           -66226.07  23    1
## Population.Mendocino Coast                         -62651.36  11    1
## Population.Mill Creek                              -66746.48  10    1
## Population.Navarro River                           -65744.79  12    1
## Population.North Fork Navarro River                -66068.80   9    1
## Population.North-Central Coastal Diversity Stratum -64040.39  11    1
## Population.Noyo River                              -65882.76  20    1
## Population.Olema Creek                             -66937.05  24    1
## Population.Pescadero Creek                         -66209.65   6    1
## Population.Pine Gulch Creek                        -66904.06  21    1
## Population.Prairie Creek                           -66798.83  10    1
## Population.Pudding Creek                           -66209.76  20    1
## Population.Redwood Creek                           -66472.85   8    1
## Population.Redwood Creek (Marin Co)                -67024.18  24    1
## Population.Russian River                           -65792.25  20    1
## Population.Salinas River                           -66448.34   6    1
## Population.Salsipuedes Creek                       -66317.04  19    1
## Population.San Gregorio Creek                      -66478.78   5    1
## Population.San Lorenzo River                       -66256.90   6    1
## Population.San Mateo sub-region                    -66457.54   2    1
## Population.San Vicente Creek                       -66485.26   7    1
## Population.Santa Clara River                       -66691.55  20    1
## Population.Santa Ynez River                        -66574.67  10    1
## Population.Schooner Gulch                          -66410.00   1    1
## Population.Scott Creek                             -66268.95  18    1
## Population.Scott River                             -66323.39  11    1
## Population.Smith River                             -61170.58   6    1
## Population.Solstice Creek                          -66702.35   5    1
## Population.Soquel Creek                            -66605.30   4    1
## Population.South Fork Eel River                    -65733.87  10    1
## Population.South Fork Noyo River                   -66799.79  22    1
## Population.South Fork Ten Mile River               -66299.36   2    1
## Population.Ten Mile River                          -65866.28  13    1
## Population.Topanga Creek                           -66694.20  21    1
## Population.Trancas Creek                           -66702.35   5    1
## Population.Trinity River                           -61773.92  18    1
## Population.Usal Creek                              -66323.60  10    1
## Population.Ventura River                           -66860.43  15    1
## Population.Waddell Creek                           -66834.53   4    1
## Population.Wages Creek                             -66245.58   4    1
## Population.Zuma Creek                              -66702.35   5    1
##                                                            fe
## Population.Albion River                            Population
## Population.Alder Creek                             Population
## Population.Aptos Creek                             Population
## Population.Arroyo Hondo Creek                      Population
## Population.Arroyo Sequit Creek                     Population
## Population.Big Basin sub-region                    Population
## Population.Big Basin/San Mateo region              Population
## Population.Big River                               Population
## Population.Big Salmon Creek                        Population
## Population.Big Sur River                           Population
## Population.Big Sycamore Creek                      Population
## Population.Bogus Creek                             Population
## Population.Brush Creek                             Population
## Population.Carmel River                            Population
## Population.Carpinteria Creek                       Population
## Population.Caspar Creek                            Population
## Population.Central Coastal Diversity Stratum       Population
## Population.Cheda Creek                             Population
## Population.Cottaneva Creek                         Population
## Population.DeHaven Creek                           Population
## Population.Dry Creek                               Population
## Population.Eel River                               Population
## Population.Elk Creek                               Population
## Population.Freshwater Creek                        Population
## Population.Garcia River                            Population
## Population.Gazos Creek                             Population
## Population.Greenwood Creek                         Population
## Population.Gualala River                           Population
## Population.Hare Creek                              Population
## Population.Hilton Creek                            Population
## Population.Humboldt Bay                            Population
## Population.Juan Creek                              Population
## Population.Lagunitas Creek                         Population
## Population.Las Flores Creek                        Population
## Population.Little River                            Population
## Population.Lower Eel River and Van Duzen River     Population
## Population.Lower Mainstem Eel River                Population
## Population.Malibu Creek                            Population
## Population.Mattole River                           Population
## Population.Mendocino Coast                         Population
## Population.Mill Creek                              Population
## Population.Navarro River                           Population
## Population.North Fork Navarro River                Population
## Population.North-Central Coastal Diversity Stratum Population
## Population.Noyo River                              Population
## Population.Olema Creek                             Population
## Population.Pescadero Creek                         Population
## Population.Pine Gulch Creek                        Population
## Population.Prairie Creek                           Population
## Population.Pudding Creek                           Population
## Population.Redwood Creek                           Population
## Population.Redwood Creek (Marin Co)                Population
## Population.Russian River                           Population
## Population.Salinas River                           Population
## Population.Salsipuedes Creek                       Population
## Population.San Gregorio Creek                      Population
## Population.San Lorenzo River                       Population
## Population.San Mateo sub-region                    Population
## Population.San Vicente Creek                       Population
## Population.Santa Clara River                       Population
## Population.Santa Ynez River                        Population
## Population.Schooner Gulch                          Population
## Population.Scott Creek                             Population
## Population.Scott River                             Population
## Population.Smith River                             Population
## Population.Solstice Creek                          Population
## Population.Soquel Creek                            Population
## Population.South Fork Eel River                    Population
## Population.South Fork Noyo River                   Population
## Population.South Fork Ten Mile River               Population
## Population.Ten Mile River                          Population
## Population.Topanga Creek                           Population
## Population.Trancas Creek                           Population
## Population.Trinity River                           Population
## Population.Usal Creek                              Population
## Population.Ventura River                           Population
## Population.Waddell Creek                           Population
## Population.Wages Creek                             Population
## Population.Zuma Creek                              Population
##                                                                                        idx
## Population.Albion River                                                       Albion River
## Population.Alder Creek                                                         Alder Creek
## Population.Aptos Creek                                                         Aptos Creek
## Population.Arroyo Hondo Creek                                           Arroyo Hondo Creek
## Population.Arroyo Sequit Creek                                         Arroyo Sequit Creek
## Population.Big Basin sub-region                                       Big Basin sub-region
## Population.Big Basin/San Mateo region                           Big Basin/San Mateo region
## Population.Big River                                                             Big River
## Population.Big Salmon Creek                                               Big Salmon Creek
## Population.Big Sur River                                                     Big Sur River
## Population.Big Sycamore Creek                                           Big Sycamore Creek
## Population.Bogus Creek                                                         Bogus Creek
## Population.Brush Creek                                                         Brush Creek
## Population.Carmel River                                                       Carmel River
## Population.Carpinteria Creek                                             Carpinteria Creek
## Population.Caspar Creek                                                       Caspar Creek
## Population.Central Coastal Diversity Stratum             Central Coastal Diversity Stratum
## Population.Cheda Creek                                                         Cheda Creek
## Population.Cottaneva Creek                                                 Cottaneva Creek
## Population.DeHaven Creek                                                     DeHaven Creek
## Population.Dry Creek                                                             Dry Creek
## Population.Eel River                                                             Eel River
## Population.Elk Creek                                                             Elk Creek
## Population.Freshwater Creek                                               Freshwater Creek
## Population.Garcia River                                                       Garcia River
## Population.Gazos Creek                                                         Gazos Creek
## Population.Greenwood Creek                                                 Greenwood Creek
## Population.Gualala River                                                     Gualala River
## Population.Hare Creek                                                           Hare Creek
## Population.Hilton Creek                                                       Hilton Creek
## Population.Humboldt Bay                                                       Humboldt Bay
## Population.Juan Creek                                                           Juan Creek
## Population.Lagunitas Creek                                                 Lagunitas Creek
## Population.Las Flores Creek                                               Las Flores Creek
## Population.Little River                                                       Little River
## Population.Lower Eel River and Van Duzen River         Lower Eel River and Van Duzen River
## Population.Lower Mainstem Eel River                               Lower Mainstem Eel River
## Population.Malibu Creek                                                       Malibu Creek
## Population.Mattole River                                                     Mattole River
## Population.Mendocino Coast                                                 Mendocino Coast
## Population.Mill Creek                                                           Mill Creek
## Population.Navarro River                                                     Navarro River
## Population.North Fork Navarro River                               North Fork Navarro River
## Population.North-Central Coastal Diversity Stratum North-Central Coastal Diversity Stratum
## Population.Noyo River                                                           Noyo River
## Population.Olema Creek                                                         Olema Creek
## Population.Pescadero Creek                                                 Pescadero Creek
## Population.Pine Gulch Creek                                               Pine Gulch Creek
## Population.Prairie Creek                                                     Prairie Creek
## Population.Pudding Creek                                                     Pudding Creek
## Population.Redwood Creek                                                     Redwood Creek
## Population.Redwood Creek (Marin Co)                               Redwood Creek (Marin Co)
## Population.Russian River                                                     Russian River
## Population.Salinas River                                                     Salinas River
## Population.Salsipuedes Creek                                             Salsipuedes Creek
## Population.San Gregorio Creek                                           San Gregorio Creek
## Population.San Lorenzo River                                             San Lorenzo River
## Population.San Mateo sub-region                                       San Mateo sub-region
## Population.San Vicente Creek                                             San Vicente Creek
## Population.Santa Clara River                                             Santa Clara River
## Population.Santa Ynez River                                               Santa Ynez River
## Population.Schooner Gulch                                                   Schooner Gulch
## Population.Scott Creek                                                         Scott Creek
## Population.Scott River                                                         Scott River
## Population.Smith River                                                         Smith River
## Population.Solstice Creek                                                   Solstice Creek
## Population.Soquel Creek                                                       Soquel Creek
## Population.South Fork Eel River                                       South Fork Eel River
## Population.South Fork Noyo River                                     South Fork Noyo River
## Population.South Fork Ten Mile River                             South Fork Ten Mile River
## Population.Ten Mile River                                                   Ten Mile River
## Population.Topanga Creek                                                     Topanga Creek
## Population.Trancas Creek                                                     Trancas Creek
## Population.Trinity River                                                     Trinity River
## Population.Usal Creek                                                           Usal Creek
## Population.Ventura River                                                     Ventura River
## Population.Waddell Creek                                                     Waddell Creek
## Population.Wages Creek                                                         Wages Creek
## Population.Zuma Creek                                                           Zuma Creek

Assumptions of OLS

Linear in Parameters

My data does not necessarily appear to be linear in parameters, which may cause it violate the first assumption of OLS.

ggplot(data = steelhead_all, aes(x = year, y = Value)) + geom_point() + geom_smooth(method=lm, se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

X has variation

This is true

Assumption 4

These do not appear to be normally distributed.

model <- lm(Value~year + year:percent_protected, data = steelhead_all)

residuals <- model$residuals |> as.data.frame()


ggplot(data = residuals) + geom_histogram(aes(x = model$residuals))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Coho

coho <- all_data |> filter(Species == "Coho salmon")

coho2 <- coho |>  group_by(Population, Brood_year) |> summarize(Value = max(Value), percent_protected = max(percent_protected)) #taking the max estimation for years where estimate done in multiple ways 
## `summarise()` has grouped output by 'Population'. You can override using the
## `.groups` argument.
data_years <- coho2 |> group_by(Population) |> summarize(min_year = min(Brood_year), max_year = max(Brood_year), total_year = length(Brood_year)) #find what years I have data over]


ggplot(data = coho2) + geom_point(aes(x = Brood_year, y = Value))
## Warning: Removed 3 rows containing missing values (`geom_point()`).

coho_final <- coho2 |>  mutate(year = as.numeric(Brood_year))

summary(felm(Value~year + year:percent_protected + percent_protected | Population, data = coho_final))
## 
## Call:
##    felm(formula = Value ~ year + year:percent_protected + percent_protected |      Population, data = coho_final) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3147.5  -132.8   -23.3    70.0  7329.2 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)   
## year                     35.9542    11.0639   3.250  0.00122 **
## percent_protected      1022.9582   344.0071   2.974  0.00307 **
## year:percent_protected   -0.5055     0.1705  -2.965  0.00315 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 772 on 562 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared(full model): 0.5481   Adjusted R-squared: 0.4951 
## Multiple R-squared(proj model): 0.01889   Adjusted R-squared: -0.09633 
## F-statistic(full model):10.33 on 66 and 562 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 3.606 on 3 and 562 DF, p-value: 0.01331

Limitations and Issues

There were a lot of NA s in the year protected roughly 32 % of the polygons I used The definition of protected area was very loose. The data was not consistent over time, so I only used data from 2008-2018. Some watersheds within the study have hatchery releases which are not consistent over time, this can wildly affect the population from year to year.